Load Files

mn_output <- readRDS("mn_output.rds")
bk_output <- readRDS("bk_output.rds")

Load Necessary Libraries

#install.packages("pacman")
library(pacman)
p_load(dplyr,
       ggplot2,
       psych)

Key Output Metrics

Manhattan

mn_output
# How many EDs in Manhattan
mn_output$ED %>% unique() %>% length()
## [1] 1478
# How many Microfilms in Manhattan
mn_output$microfilm %>% unique() %>% length()
## [1] 1276
# How many unique Street Names in Manhattan
mn_output$street_add %>% unique() %>% length()
## [1] 8557
# Street Names
mn_output$best_match %>% unique() %>% head(200)
##   [1] "FERRY"               "DUANE"               "FRANKFORT"          
##   [4] "WILLIAM"             "ROSE"                "N WILLIAM"          
##   [7] "NEW CHAMBERS"        "PARK ROW"            "GOLD"               
##  [10] "PEARL"               "HAGUE"               "VANDEWATER"         
##  [13] "OAK"                 "CHENUT"              "NEW BOWERY"         
##  [16] "CHENUT ST"           "CHAMBERS"            "BATAVIA"            
##  [19] "ROOSEVELT"           "CHERRY"              "WATER"              
##  [22] NA                    "PECK SLIP"           "DOVER"              
##  [25] "FRONT"               "SOUTH"               "MADISON"            
##  [28] "JAMES"               "OLIVER"              "CATHERINE"          
##  [31] "HENRY"               "CHATHAM SQUARE"      "BROADWAY E"         
##  [34] "JAMES SLIP"          "CATHERINE SLIP"      "CITY HALL PL"       
##  [37] "REPUBLICAN ALY"      "BROADWAY"            "CARDINAL"           
##  [40] "CITY HALL PARK"      "WALKER"              "LEONARD"            
##  [43] "WORTH"               "CENTRE"              "FRANKLIN"           
##  [46] "LAFAYETTE"           "WHITE"               "BAXTER"             
##  [49] "CANAL"               "PARK"                "MULBERRY"           
##  [52] "MOTT"                "PELL"                "DOYERS"             
##  [55] "BOWERY"              "BAYARD"              "ELIZABETH"          
##  [58] "BARROW"              "COMMERCE"            "BEDFORD"            
##  [61] "4 W"                 "JONES"               "BLEECKER"           
##  [64] "CORNELIA"            "W  4"                "MORTON"             
##  [67] "LEROY"               "HUDSON"              "LUKE\\'S PL"        
##  [70] "CARMINE"             "CLARKSON"            "VARICK"             
##  [73] "W  HOUON ST"         "DOWNING"             "1 AVE"              
##  [76] "99 E"                "E  99"               "FIR AVE"            
##  [79] "2 AVE"               "100 E"               "E  100"             
##  [82] "3 AVE"               "101 E"               "98 E"               
##  [85] "E 98"                "97 E"                "E  97"              
##  [88] "E  96"               "96 E"                "E  95"              
##  [91] "E  94"               "95 E"                "94 E"               
##  [94] "E  93"               "93 E"                "92 E"               
##  [97] "91 E"                "90 E"                "E  90"              
## [100] "E  92"               "E  91"               "E  89"              
## [103] "89 E"                "88 E"                "E  88"              
## [106] "E  87"               "87 E"                "86 E"               
## [109] "E  86"               "LEXINGTON AVE"       "PARK AVE"           
## [112] "LEX AVE"             "MADISON AVE"         "E  98"              
## [115] "5 AVE"               "W  112"              "LENOX AVE"          
## [118] "W  111"              "111 W"               "112 W"              
## [121] "W  113"              "113 W"               "114 W"              
## [124] "E  114"              "115 W"               "W  115"             
## [127] "W  116"              "116 W"               "W  117"             
## [130] "117 W"               "W  118"              "118 W"              
## [133] "W  119"              "119 W"               "W  120"             
## [136] "W  121"              "MT MORRIS AVE"       "123 W"              
## [139] "MT MORRIS PL"        "124 W"               "W  126"             
## [142] "126 W"               "W  124"              "W  125"             
## [145] "125 W"               "128 W"               "127 W"              
## [148] "W  127"              "W  129"              "W  128"             
## [151] "129 W"               "130 W"               "131 W"              
## [154] "W  130"              "W  131"              "132 W"              
## [157] "W  132"              "W  133"              "133 W"              
## [160] "DYCKMAN"             "W  205"              "W  206"             
## [163] "209 W"               "213 W"               "0"                  
## [166] "W  215"              "W  216"              "HARLEM RIVER"       
## [169] "228 W"               "227 W"               "CHARLES PL"         
## [172] "MARBLE HL AVE"       "W  218"              "9 AVE"              
## [175] "225 W"               "VAN CORLEAR PL"      "FORT CHARLES PL E"  
## [178] "ADRIAN PL"           "TER VIEW AVE"        "219 W"              
## [181] "SHERMAN AVE"         "BOLTON RD"           "SEAMAN AVE"         
## [184] "PRESCOTT AVE"        "HAWTHORNE"           "COOPER"             
## [187] "ACADEMY"             "W  213"              "10 AVE"             
## [190] "BEAK"                "EMERSON"             "BENNETT AVE"        
## [193] "NAGLE AVE"           "B"                   "C"                  
## [196] "NORTH RIVER"         "FORT WASHINGTON AVE" "NORTHERN AVE"       
## [199] "W  187"              "NICHOLAS AVE"
# Total number of entries from sample dataset in Manhattan
nrow(mn_output)
## [1] 557357
# Tabulate Result Type in Manhattan
# 1 of 6 possible match types: (1) Perfect Match, (2) Identical Match, (3) Singular Mode, (4) Multiple Modes, (5) NA mode, (6) No match. Refer to documentation for more details.
table(mn_output$result_type)
## 
##      1      2      3      4      5      6 
## 155740  70290 265278  18236   6713   2863
  • 28% Perfect Match
  • 12% Identical Match
  • 47.5% Singular Mode
  • 3% Multiple Modes
  • 1.2% NAs
  • 0.5% No Match
# NAs
mn_output %>% filter(result_type == 5) %>% 
  select(ED, street_add, best_match, result_type) %>%
  head(200)
# No Outputs -- the best match comes out as "0"
mn_output %>% filter(result_type == 6) %>% 
  select(ED, street_add, best_match, result_type) %>%
  head(200)
# Tabulate Flagged Streets in Manhattan
table(mn_output$flag_st)
## 
##      0      1 
## 491308  27812
# Tabulate Flagged House Number Cleaned in Manhattan
table(mn_output$flag_hn_cleaned)
## 
##      0      1 
## 152319   7350
# Tabulate Flagged Filled House Numbers in Manhattan
table(mn_output$flg_filled_hn)
## 
##      0      1 
## 192627 364730

Brooklyn

bk_output
# How many EDs in Brooklyn
bk_output$ED %>% unique() %>% length()
## [1] 1106
# How many Microfilms in Brooklyn
bk_output$microfilm %>% unique() %>% length()
## [1] 1527
# How many unique Street Names in Brooklyn
bk_output$street_add %>% unique() %>% length()
## [1] 7076
# Street Names Brooklyn
bk_output$best_match %>% unique() %>% head(200)
##   [1] "MARION"            "HOWARD AVE"        "CHAUNCEY"         
##   [4] "RALPH AVE"         "BAINBRIDGE"        "SARATOGA AVE"     
##   [7] "SUMPTER"           NA                  "MCDOUGAL"         
##  [10] "FULTON"            "HULL"              "HERKIMER"         
##  [13] "ATLANTIC AVE"      "BANCROFT PL"       "PRESCOTT PL"      
##  [16] "DEWEY PL"          "LOUIS PL"          "RUSSELL PL"       
##  [19] "OCEAN PL"          "HOPKINSON AVE"     "GUNTHER PL"       
##  [22] "ROCKAWAY AVE"      "SOMERS"            "BROADWAY"         
##  [25] "1 AVE"             "TRUXTON"           "EAERN PKWY"       
##  [28] "EA PKWY"           "PLEASANT PL"       "SACKMAN"          
##  [31] "NORMAN PL"         "JARDINE PL"        "SHERLOCK PL"      
##  [34] "VAN SINDEREN AVE"  "HAVENS PL"         "CONWAY"           
##  [37] "WILLIAMS PL"       "E  NEW YORK AVE"   "ROGERS AVE"       
##  [40] "BEDFORD AVE"       "MARKS AVE"         "FRANKLIN AVE"     
##  [43] "BERGEN"            "DEAN"              "PACIFIC"          
##  [46] "PROSPECT PL"       "PARK PL"           "NORAND AVE"       
##  [49] "GRANT SQ"          "NEW YORK AVE"      "BROOKLYN AVE"     
##  [52] "KINGON AVE"        "ALBANY AVE"        "REVERE PL"        
##  [55] "SAINT MARK'S AVE"  "TROY AVE"          "SCHENECTADY AVE"  
##  [58] "UTICA AVE"         "ROCHEER AVE"       "BUFFALO AVE"      
##  [61] "ETNA"              "RICHMOND"          "RIDGEWOOD AVE"    
##  [64] "LOGAN"             "CHENUT ST"         "EUCLID AVE"       
##  [67] "PINE"              "CRESCENT"          "GRANT AVE"        
##  [70] "ENFIELD"           "NICHOLS AVE"       "LINCOLN AVE"      
##  [73] "RAILROAD AVE"      "HEMLOCK"           "ELDERTS LN"       
##  [76] "CRESCENT TER"      "GLEN"              "WELDON"           
##  [79] "SHERIDAN AVE"      "WEIRFIELD"         "9 AVE"            
##  [82] "FORCE TUBE AVE"    "ARLINGTON AVE"     "DRESDEN"          
##  [85] "HALE AVE"          "NORWOOD AVE"       "GLENMORE AVE"     
##  [88] "LIBERTY AVE"       "FOUNTAIN AVE"      "MILFORD"          
##  [91] "MONTAUK AVE"       "BERRIMAN"          "HL"               
##  [94] "MAGENTA"           "CRYAL ST"          "AUTUMN AVE"       
##  [97] "DOSCHER"           "CRYAL"             "FORBELL AVE"      
## [100] "MCKINLEY AVE"      "PITKIN AVE"        "ATKINS AVE"       
## [103] "BELMONT AVE"       "SUTTER AVE"        "BLAKE AVE"        
## [106] "DUMONT AVE"        "VIENNA AVE"        "HEGEMAN AVE"      
## [109] "NEW LOTS RD"       "ANLEY AVE"         "WORTMAN AVE"      
## [112] "SCHENCK AVE"       "BARBEY"            "JEROME"           
## [115] "ASHFORD"           "CLEVELAND"         "ELTON"            
## [118] "ESSEX"             "SCHENK AVE"        "HENDRIX"          
## [121] "NEWJERSEY AVE"     "PENNSYLVANIA AVE"  "SHEFFIELD AVE"    
## [124] "0"                 "MALTA"             "LOUISIANA AVE"    
## [127] "WILLIAMS AVE"      "SNEDIKER AVE"      "SUMNER PL"        
## [130] "FLUSHING AVE"      "FAYETTE"           "BEAVER"           
## [133] "ELLERY"            "BELVIDERE"         "LOCU ST"          
## [136] "PARK"              "ARION PL"          "BUSHWICK AVE"     
## [139] "MELROSE"           "JEFFERSON"         "TROUTMAN"         
## [142] "MYRTLE AVE"        "GARDEN"            "BREMEN"           
## [145] "MONTIETH"          "NOLL"              "EVERGREEN AVE"    
## [148] "FORRE ST"          "WILLOUGHBY AVE"    "CHARLES PL"       
## [151] "DITMARS"           "SUYDAM"            "HART"             
## [154] "DODWORTH"          "LAWTON"            "CEDAR"            
## [157] "KOSCIUSKO"         "DE KALB AVE"       "CENTRAL AVE"      
## [160] "HAMBURG AVE"       "ANHOPE ST"         "OCKHOLM ST"       
## [163] "ARR ST"            "KOSSUTH PL"        "LAFAYETTE AVE"    
## [166] "VAN BUREN"         "HARMAN"            "HIMROD"           
## [169] "GREENE AVE"        "BLEECKER"          "GEORGE"           
## [172] "FORRE"             "KNICKERBOCKER AVE" "IRVING AVE"       
## [175] "OCKHOLM"           "WYCKOFF AVE"       "NICHOLAS AVE"     
## [178] "CYPRESS AVE"       "SCOTT AVE"         "ANHOPE"           
## [181] "GROVE"             "CENTRAL PL"        "RALPH"            
## [184] "LINDEN"            "GATES AVE"         "PALMETTO"         
## [187] "WOODBINE"          "MADISON"           "RIDGEWOOD PL"     
## [190] "HALSEY"            "CORNELIA"          "JEFFERSON AVE"    
## [193] "PUTNAM AVE"        "HANCOCK"           "ELDERT"           
## [196] "COVERT"            "SCHAEFFER"         "CONEY ISLAND AVE" 
## [199] "R AVE"             "14  E"
# Total number of entries from sample dataset in Brooklyn
nrow(bk_output)
## [1] 371833
# Tabulate Result Type in Brooklyn
# 1 of 6 possible match types: (1) Perfect Match, (2) Identical Match, (3) Singular Mode, (4) Multiple Modes, (5) NA mode, (6) No match. Refer to documentation for more details.
table(bk_output$result_type)
## 
##      1      2      3      4      5      6 
## 284604  29726  35184   5701   4992   3164
  • 76.5% Perfect Match
  • 7.9% Identical Match
  • 9.5% Singular Mode
  • 1.5% Multiple Modes
  • 1.3% NAs
  • 0.8% No Match
# Tabulate Flagged Streets in Brooklyn
table(bk_output$flag_st)
## 
##      0      1 
## 349514  13857
# Tabulate Flagged House Number Cleaned in Brooklyn
table(bk_output$flag_hn_cleaned)
## 
##      0      1 
## 176736   2567
# Tabulate Flagged Filled House Numbers in Brooklyn
table(bk_output$flg_filled_hn)
## 
##      0      1 
## 188898 182935